Skip to contents

Roy 2025 Figures and tables

Introduction

  • Once you’ve downloaded all the requisite source information her the package’s instructions, you can build the package and re-create Figures and Tables published in Roy2025
  • This vignette is provided to enable anyone to see the source data and methodology behind our publication.

Figure 1

1A - Continental US A11

Code
state_frequencies <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_state |> dplyr::filter(allele == 'A*11:01')

out_data <- state_frequencies |>
  dplyr::ungroup() |>
  dplyr::group_by(region, census_region, fips) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf))

gg_state <- usmap::plot_usmap(
  data = out_data,
  regions = "states",
  #exclude = c('Alaksa','Hawaii'),
  exclude = c('AK', 'HI'),
  values = "gf",
  color = "black",
  linewidth = 0.1
) +
  viridis::scale_fill_viridis(option = "plasma", direction = 1)
Code
gg_state

Code
out_data |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `State` = region,
    `Census Region` = census_region,
    `FIPS` = fips,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1B - A11:01 by County

Code
info_by_county <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_county |> 
  dplyr::filter(!(state %in% c('Alaska','Hawaii'))) |> 
  dplyr::filter(allele == 'A*11:01') 

out_data <- info_by_county |>
  dplyr::ungroup() |>
  dplyr::filter(allele == 'A*11:01') |>
  dplyr::group_by(region, state, census_region, county, fips, loci, allele) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf)) |>
  dplyr::filter(!(is.na(gf))) |>
  # Create a STATEFP and COUNTYFP column by breaking the fips column on the 3rd character to the end
  dplyr::mutate(STATEFP = substr(fips, 1, 2),
                COUNTYFP = substr(fips, 3, nchar(fips)))

out_data <-
  out_data |>
  dplyr::mutate(
    county = dplyr::case_when(
      state == "Connecticut" &
        census_region == "Litchfield County, Connecticut" ~ "Northwest Hills Planning Region",
      state == "Connecticut" &
        census_region == "Hartford County, Connecticut" ~ "Capitol Planning Region",
      state == "Connecticut" &
        census_region == "Middlesex County, Connecticut" ~ "Lower Connecticut River Valley Planning Region",
      state == "Connecticut" &
        census_region == "Windham County, Connecticut" ~ "Northeastern Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "New Haven County, Connecticut" ~ "South Central Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "New London Count, Connecticut" ~ "Southeastern Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "Fairfield County, Connecticut" ~ "Western Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "Tolland County" ~ "Capitol Planning Region",
      census_region == "Doña Ana County" ~ "Donna Ana County",
      census_region == "Chugach Census Area" ~ "Valdez-Cordova Census Area",
      census_region == "Copper River Census Area" ~ "Valdez-Cordova Census Area",
      T ~ census_region
    )) |> 
  dplyr::mutate(
    fips = dplyr::case_when(state == "Connecticut" & county == "Northwest Hills Planning Region" ~ "09160",
                        state == "Connecticut" & county == "Greater Bridgeport Planning Region" ~ "09120",
                        state == "Connecticut" & county == "Lower Connecticut River Valley Planning Region" ~ "09130",
                        state == "Connecticut" & county == "Naugatuck Valley Planning Region" ~ "09140",
                        state == "Connecticut" & county == "Northeastern Connecticut Planning Region" ~ "09150",
                        state == "Connecticut" & county == "South Central Connecticut Planning Region" ~ "09170",
                        state == "Connecticut" & county == "Southeastern Connecticut Planning Region" ~ "09180",
                        state == "Connecticut" & county == "Western Connecticut Planning Region" ~ "09190",
                        state == "Connecticut" & county == "Capitol Planning Region" ~ "09110",
                        T ~ fips)
    )

gg_a11_by_county <- 
  usmap::plot_usmap(
  data = out_data,
  regions = "counties",
  exclude = c('AK','HI'),
  #include = c('AK', 'HI'),
  values = "gf",
  color = "black",
  linewidth = 0.1
) +
  viridis::scale_fill_viridis(option = "plasma", direction = 1)
Code
gg_a11_by_county

Code
out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1C - NDMP Correlations

NMDP Values are not publicly available for privacy reasons. Please contact co-Author Martin Maiers for inquiries.

1D - A11:01 by CA County

Code
info_by_county <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_county |> 
  dplyr::filter(state %in% c('California')) |> 
  dplyr::filter(allele %in% c('A*11:01','A*02:01','A*03:01'))
Code
out_data <- info_by_county |>
  dplyr::ungroup() |>
  dplyr::filter(allele == 'A*11:01') |>
  dplyr::group_by(region, state, census_region, county, fips, loci, allele) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf)) |>
  dplyr::filter(!(is.na(gf))) |>
  # Create a STATEFP and COUNTYFP column by breaking the fips column on the 3rd character to the end
  dplyr::mutate(STATEFP = substr(fips, 1, 2),
                COUNTYFP = substr(fips, 3, nchar(fips)))

gg_a11_in_ca <- 
  usmap::plot_usmap(
  data = out_data,
  regions = "counties",
  include = c('CA'),
  values = "gf",
  color = "black",
  linewidth = 0.1
) +
  viridis::scale_fill_viridis(option = "plasma", direction = 1)

gg_a11_in_ca

Code
out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1E - A11:01 by CA by H4 Hexagon

Code
ca_4$p1

Code
ca_4$genotypic_frequencies_by_hexon |> 
  dplyr::ungroup() |> 
  dplyr::mutate(us_2020_nmdp_gf_sum = round(us_2020_nmdp_gf_sum,4)) |> 
  dplyr::rename(
    `Hexagon ID` = hex,
    `Allele` = allele,
    `Hexagon Population` = total_2020_pop,
    `Population-Adjusted Genotypic Frequency` = us_2020_nmdp_gf_sum) |> 
  dplyr::select(
    `Hexagon ID`,
    `Allele`,
    `Hexagon Population`,
    `Population-Adjusted Genotypic Frequency`
  ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

1F - A11:01 Catchment

Code
gg_catchment <- plot_delNero2022_catchment_areas(
  query_allele = 'A*11:01',
  CensusHLA::a11_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed
)
Code
gg_catchment

  • For a table of A11:01 catchment, refer to Table 2 below.

Figure 2

2A - B58:01 by County

Code
info_by_county <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_county |> 
  dplyr::filter(!(state %in% c('Alaska','Hawaii'))) |> 
  dplyr::filter(allele == 'B*58:01') 

out_data <- info_by_county |>
  dplyr::ungroup() |>
  dplyr::filter(allele == 'B*58:01') |>
  dplyr::group_by(region, state, census_region, county, fips, loci, allele) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf)) |>
  dplyr::filter(!(is.na(gf))) |>
  # Create a STATEFP and COUNTYFP column by breaking the fips column on the 3rd character to the end
  dplyr::mutate(STATEFP = substr(fips, 1, 2),
                COUNTYFP = substr(fips, 3, nchar(fips)))

out_data <-
  out_data |>
  dplyr::mutate(
    county = dplyr::case_when(
      state == "Connecticut" &
        census_region == "Litchfield County, Connecticut" ~ "Northwest Hills Planning Region",
      state == "Connecticut" &
        census_region == "Hartford County, Connecticut" ~ "Capitol Planning Region",
      state == "Connecticut" &
        census_region == "Middlesex County, Connecticut" ~ "Lower Connecticut River Valley Planning Region",
      state == "Connecticut" &
        census_region == "Windham County, Connecticut" ~ "Northeastern Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "New Haven County, Connecticut" ~ "South Central Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "New London Count, Connecticut" ~ "Southeastern Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "Fairfield County, Connecticut" ~ "Western Connecticut Planning Region",
      state == "Connecticut" &
        census_region == "Tolland County" ~ "Capitol Planning Region",
      census_region == "Doña Ana County" ~ "Donna Ana County",
      census_region == "Chugach Census Area" ~ "Valdez-Cordova Census Area",
      census_region == "Copper River Census Area" ~ "Valdez-Cordova Census Area",
      T ~ census_region
    )) |> 
  dplyr::mutate(
    fips = dplyr::case_when(state == "Connecticut" & county == "Northwest Hills Planning Region" ~ "09160",
                        state == "Connecticut" & county == "Greater Bridgeport Planning Region" ~ "09120",
                        state == "Connecticut" & county == "Lower Connecticut River Valley Planning Region" ~ "09130",
                        state == "Connecticut" & county == "Naugatuck Valley Planning Region" ~ "09140",
                        state == "Connecticut" & county == "Northeastern Connecticut Planning Region" ~ "09150",
                        state == "Connecticut" & county == "South Central Connecticut Planning Region" ~ "09170",
                        state == "Connecticut" & county == "Southeastern Connecticut Planning Region" ~ "09180",
                        state == "Connecticut" & county == "Western Connecticut Planning Region" ~ "09190",
                        state == "Connecticut" & county == "Capitol Planning Region" ~ "09110",
                        T ~ fips)
    )

gg_b58_by_county <- 
  usmap::plot_usmap(
  data = out_data,
  regions = "counties",
  exclude = c('AK','HI'),
  #include = c('AK', 'HI'),
  values = "gf",
  color = "black",
  linewidth = 0.1
) +
  viridis::scale_fill_viridis(option = "plasma", direction = 1)

gg_b58_by_county

2B - B58:01 in MS by County

Code
info_by_county <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_county |> 
  dplyr::filter(state %in% c('Mississippi')) |> 
  dplyr::filter(allele %in% c('B*58:01'))
Code
out_data <- info_by_county |>
  dplyr::ungroup() |>
  dplyr::filter(allele == 'B*58:01') |>
  dplyr::group_by(region, state, census_region, county, fips, loci, allele) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf)) |>
  dplyr::filter(!(is.na(gf))) |>
  # Create a STATEFP and COUNTYFP column by breaking the fips column on the 3rd character to the end
  dplyr::mutate(STATEFP = substr(fips, 1, 2),
                COUNTYFP = substr(fips, 3, nchar(fips)))

gg_b58_in_ms <- 
  usmap::plot_usmap(
  data = out_data,
  regions = "counties",
  include = c('MS'),
  values = "gf",
  color = "black",
  linewidth = 0.1
) +
  viridis::scale_fill_viridis(option = "plasma", direction = 1) + 
  theme(legend.position = 'right' )

gg_b58_in_ms

Code
out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

2C - B58:01 in MS by Hexagon

Code
ms_4$p1

Code
ms_4$genotypic_frequencies_by_hexon |> 
  dplyr::ungroup() |> 
  dplyr::mutate(us_2020_nmdp_gf_sum = round(us_2020_nmdp_gf_sum,4)) |> 
  dplyr::rename(
    `Hexagon ID` = hex,
    `Allele` = allele,
    `Hexagon Population` = total_2020_pop,
    `Population-Adjusted Genotypic Frequency` = us_2020_nmdp_gf_sum) |> 
  dplyr::select(
    `Hexagon ID`,
    `Allele`,
    `Hexagon Population`,
    `Population-Adjusted Genotypic Frequency`
  ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

2D - B58:01 Catchment

Code
gg_catchment <- plot_delNero2022_catchment_areas(
  query_allele = 'B*58:01',
  CensusHLA::b58_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed
)
Code
gg_catchment

Tables

Table 1: United States 2020 Census Adjusted HLA-A*11:01 Genotypic Frequencies

Code
CensusHLA::us_pop_multirace_in_nmdp_codes |> 
  dplyr::left_join(
  CensusHLA::nmdp_hla_frequencies_by_race_us_2020_census_adjusted |> 
    dplyr::filter(allele == 'A*11:01') |> 
    dplyr::select(allele, allele, nmdp_race_code,us_2020_percent_pop,nmdp_calc_gf,us_2020_nmdp_gf) |> 
    dplyr::arrange(desc(us_2020_percent_pop))
  ) |> 
  # Convert percentages and gfs to percentages
  dplyr::mutate(
    us_2020_percent_pop = us_2020_percent_pop * 100,
    nmdp_calc_gf = nmdp_calc_gf * 100,
    us_2020_nmdp_gf = us_2020_nmdp_gf * 100
  ) |>
  # Round percentages and gf to 1 digit after decimal
  dplyr::mutate(
    us_2020_percent_pop = round(us_2020_percent_pop, 1),
    nmdp_calc_gf = round(nmdp_calc_gf, 1),
    us_2020_nmdp_gf = round(us_2020_nmdp_gf, 1)
  ) |>
  dplyr::select(
    `Ethnic Code` = nmdp_race_code,
    `Allele` = allele,
    `Single Race Population` =  total_single_race_pop,
    `Multi-Race Population ` = total_multiple_race_pop,
    `Total Population` = total_2020_pop,
    `Percentage of Total Pop` = us_2020_percent_pop,
    `NMDP Calcualted GF` = nmdp_calc_gf,
    `Population-Adjusted GF` = us_2020_nmdp_gf
  ) |> 
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

Table 2: HLA-A*11:01 Population-adjusted genotypic frequencies for top 11 NCI Catchment areas.

Code
CensusHLA::a11_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed |> dplyr::select(-geometry) |> dplyr::mutate(patient_pop = total_2020_pop * us_2020_nmdp_gf_sum) |>  dplyr::arrange(desc(patient_pop)) |> DT::datatable(
  ,filter = 'top'
  ,rownames = FALSE
  ,extensions = 'Buttons', options = list(
    scrollX=TRUE,
    #pageLength = 11,
    dom = 'Bfrtip',
    buttons = c('csv', 'excel')
    )
  )

Supplemental Tables

Supplemental Table 1 - California County population-adjusted HLA-A*11:01 Genotypic frequencies

Code
info_by_county <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_county |> 
  dplyr::filter(state %in% c('California')) |> 
  dplyr::filter(allele %in% c('A*11:01','A*02:01','A*03:01'))

out_data <- info_by_county |>
  dplyr::ungroup() |>
  dplyr::filter(allele == 'A*11:01') |>
  dplyr::group_by(region, state, census_region, county, fips, loci, allele) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf),
                   population = sum(total_2020_pop)) |>
  dplyr::filter(!(is.na(gf))) |>
  # Create a STATEFP and COUNTYFP column by breaking the fips column on the 3rd character to the end
  dplyr::mutate(STATEFP = substr(fips, 1, 2),
                COUNTYFP = substr(fips, 3, nchar(fips)))

out_data <- out_data |> 
  dplyr::arrange(desc(gf))
Code
out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

Supplemental Table 2 - United States 2020 Census Adjusted HLA-B*58:01 Genotypic Frequencies for Mississippi

Code
CensusHLA::us_pop_multirace_in_nmdp_codes |> 
  dplyr::left_join(
  CensusHLA::census_adjusted_nmdp_hla_frequencies_by_state |> dplyr::filter(allele == 'B*58:01') |> 
  dplyr::filter(census_region == 'Mississippi') |> 
    dplyr::select(allele,census_region,nmdp_race_code,us_2020_percent_pop,nmdp_calc_gf,us_2020_nmdp_gf) |> 
    dplyr::arrange(desc(us_2020_percent_pop))
  ) |> 
  # Convert percentages and gfs to percentages
  dplyr::mutate(
    us_2020_percent_pop = us_2020_percent_pop * 100,
    nmdp_calc_gf = nmdp_calc_gf * 100,
    us_2020_nmdp_gf = us_2020_nmdp_gf * 100
  ) |>
  # Round percentages and gf to 1 digit after decimal
  dplyr::mutate(
    us_2020_percent_pop = round(us_2020_percent_pop, 1),
    nmdp_calc_gf = round(nmdp_calc_gf, 1),
    us_2020_nmdp_gf = round(us_2020_nmdp_gf, 1)
  ) |>
  dplyr::select(
    `Region` = census_region,
    `Ethnic Code` = nmdp_race_code,
    `Allele` = allele,
    `Single Race Population` =  total_single_race_pop,
    `Multi-Race Population ` = total_multiple_race_pop,
    `Total Population` = total_2020_pop,
    `Percentage of Total Pop` = us_2020_percent_pop,
    `NMDP Calcualted GF` = nmdp_calc_gf,
    `Population-Adjusted GF` = us_2020_nmdp_gf
  ) |> 
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

Supplemental Table 3 - Mississippi County population-adjusted HLA-B*58:01 Genotypic frequencies

Code
info_by_county <- CensusHLA::census_adjusted_nmdp_hla_frequencies_by_county |> 
  dplyr::filter(state %in% c('Mississippi')) |> 
  dplyr::filter(allele %in% c('B*58:01'))

out_data <- info_by_county |>
  dplyr::ungroup() |>
  dplyr::filter(allele == 'B*58:01') |>
  dplyr::group_by(region, state, census_region, county, fips, loci, allele) |>
  dplyr::summarize(gf = sum(us_2020_nmdp_gf),
                   population = sum(total_2020_pop)) |>
  dplyr::filter(!(is.na(gf))) |>
  # Create a STATEFP and COUNTYFP column by breaking the fips column on the 3rd character to the end
  dplyr::mutate(STATEFP = substr(fips, 1, 2),
                COUNTYFP = substr(fips, 3, nchar(fips)))

out_data <- out_data |> 
  dplyr::arrange(desc(gf))
Code
out_data |> 
  dplyr::ungroup() |> 
  dplyr::mutate(gf = round(gf,4)) |> 
  dplyr::rename(
    `Census Region` = census_region,
    `County` = county,
    `FIPS` = fips,
    `Allele` = allele,
    `Population-Adjusted Genotypic Frequency` = gf
  ) |>
    dplyr::select(
      `Census Region`,
      `County`,
      `FIPS`,
      `Allele`,
      `Population-Adjusted Genotypic Frequency`
    ) |>
  DT::datatable(
    ,filter = 'top'
    ,rownames = FALSE
    ,extensions = 'Buttons', options = list(
      scrollX=TRUE,
      pageLength = 10,
      dom = 'Bfrtip',
      buttons = c('csv', 'excel')
      )
    )

Supplemental Table 4 - HLA-B*58:01 Population-adjusted genotypic frequencies by NCI Catchment areas. {#st4}

Code
CensusHLA::b58_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed |> dplyr::select(-geometry) |> dplyr::mutate(patient_pop = total_2020_pop * us_2020_nmdp_gf_sum) |>  dplyr::arrange(desc(patient_pop)) |> DT::datatable(
  ,filter = 'top'
  ,rownames = FALSE
  ,extensions = 'Buttons', options = list(
    scrollX=TRUE,
    pageLength = 11,
    dom = 'Bfrtip',
    buttons = c('csv', 'excel')
    )
  )

References